library("readxl")
library(dplyr)
library(ggplot2)
library(tidyverse)
library(ggthemes)
library(sf)
library(DT)
ks<-read.csv("kickstarter_projects_2021-03.csv")
ks_most_successful_by_state<-ks %>% select(top_category,pledged,goal) %>% mutate(achievement_ratio=round(pledged/goal *100,1)) %>% group_by(top_category
) %>% summarise(mean(achievement_ratio)) %>% arrange(desc(`mean(achievement_ratio)`)) %>% top_n(10) %>% rename(average_achievement_ratio =`mean(achievement_ratio)`)
datatable(ks_most_successful_by_state)
ggplot(ks_most_successful_by_state,aes(x=reorder(top_category,average_achievement_ratio),y=average_achievement_ratio))+geom_bar(stat="identity",fill="orange")+labs(title="Top 10 Most Successful Top Categories in Attracting Funding by Achievement Ratio",caption= "Source:Kickstarter",y="Average Achievement Ratio")+theme_pander()+
theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
axis.text.x = element_text(size=9,face = "bold"),
axis.text.y = element_text(size=8, face="bold"),
axis.title.y = element_text(size=11, face="bold"),
axis.title.x = element_blank()
)
Finding: From the above graph, we could see Music is the most successful top category in attracting funding in term of achievement ratio. Top 10 top categories in attracting funding are more likely in entertainment and art area such as music, comics,design, games, publishing, art,film&video, crafts, and fashion.
Note: I use the state variable to identify the most innovative states and most innovative cities. Top 10 most innovative states: The states with most projects with states of “successful” Top 50 most innovative cities: The cities with most projects with states of “successful”
# I changed location_state into STUSPS to prepare for the merging with shapefile
total_successful_case_per_state<-ks %>% filter(state=="successful") %>% group_by(location_state) %>% count(location_state) %>% arrange(desc(n)) %>% rename(`Total Successful Cases` = n) %>% rename(STUSPS=location_state)
total_successful_case_per_city<-ks %>% filter(state=="successful") %>% select(location_state,location_town) %>%group_by(location_town) %>% mutate(n=n())%>% arrange(desc(n)) %>% rename(`Total Successful Cases` = n) %>% distinct(location_town,.keep_all = TRUE)
# To assign rank for cities and states by total nummber of successful cases
most_innovative_state_rank<-data.frame(rank=seq_along(1:51),total_successful_case_per_state)
most_innovative_city_rank<-data.frame(rank=seq_along(1:4552),total_successful_case_per_city)
datatable(most_innovative_state_rank)
# Select the top 50 most innovative cities
top_50_innovative_city<-most_innovative_city_rank[1:50,]
# This step is to combine the state with city since t here are many cities have same name but in different states. Thus, after combining the state and the city, I could select the accurate locations from the city shapefile.
top_50_innovative_city<-top_50_innovative_city %>%
unite(city_state, c("location_town", "location_state"),sep=" ")
# I find the city:Saint Louis MO has different name in Kickstarter dataset and city shapefile, Thus, I make Saint Louis MO the same in the ks dataset as in the shapefile
top_50_innovative_city<-top_50_innovative_city%>% mutate(city_state=replace(city_state, city_state=="St. Louis MO", "Saint Louis MO"))
datatable(top_50_innovative_city)
library(leaflet)
library(rgdal)
require(sp)
states <- readOGR(dsn = "tl_2017_us_state/tl_2017_us_state.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/shiyinglai/Documents/GitHub/Laishiying/hw 03/tl_2017_us_state/tl_2017_us_state.shp", layer: "tl_2017_us_state"
## with 56 features
## It has 14 fields
## Integer64 fields read as strings: ALAND AWATER
states<-merge(states,most_innovative_state_rank,by="STUSPS")
states@data<-states@data %>% mutate(top10_states = case_when(rank <=10 ~ "Top 10 innovative states"))
states$top10_states[is.na(states$top10_states)]<-"None top 10 innovative states "
#This is how I get the location of most innovative cities from a US City shapefile
# town <- readOGR(dsn = "tufts-uscitiestowns1mil14-shapefile/GISPORTAL_GISOWNER01_USCITIESTOWNS1MIL14.shp")
# town@data<-town@data %>% unite(city_state,c("NAME","STATE"),sep=" ",remove = FALSE)
# city<-merge(town,top_50_innovative_city,by="city_state")
# city@data<-city@data %>% filter(!is.na(rank))
# write.csv(x=city@data, file="city_long_lat")
city_long_lat<-read.csv("city_long_lat")
content_states <- paste("state:",states@data$NAME,"<br/>",
"Rank:",states@data$rank,"<br/>"
)
content_city <- paste("city:",city_long_lat$city_state,"<br/>",
"Rank:",city_long_lat$rank,"<br/>"
)
library(RColorBrewer)
colorsConst <- colorFactor(palette = c("steelblue","lightcyan"), states$top10_states)
leaflet() %>%
addProviderTiles("OpenStreetMap.Mapnik") %>%
setView(lat = 37, lng = -95, zoom = 4) %>%
addPolygons(group="Top 10 most innovative states",data =states,stroke = TRUE, smoothFactor = 0.5, weight=1, color='#333333', opacity=1,
fillColor = ~colorsConst(top10_states), fillOpacity = 1,popup = content_states) %>% addLegend(group="Top 10 most innovative states",
"bottomright",
pal = colorsConst,
values = states$top10_states,
title = "Top 10 Innovative States",
opacity = 1, ) %>%
addCircleMarkers(group="Top 50 most innovative cities",,data=city_long_lat, lng=~LONGITUDE,lat=~LATITUDE,stroke = FALSE, fillOpacity = 1,popup = content_city,color ="orange") %>% addLayersControl(overlayGroups = c("Top 10 most innovative states","Top 50 most innovative cities"), options = layersControlOptions(collapsed = FALSE))